組員: B035040052 林榮鏵 B045020041諶彥汝 B056090039 楊禾馨 M064020056 閻俞蓉 M064111003 洪筱涵 M074111035 劉志峰


pacman::p_load(caTools, ggplot2, dplyr)
D = read.csv("data/quality.csv")  # Read in dataset
set.seed(88)
split = sample.split(D$PoorCare, SplitRatio = 0.75)  # split vector
TR = subset(D, split == TRUE)
TS = subset(D, split == FALSE)
glm1 = glm(PoorCare ~ OfficeVisits + Narcotics, TR, family=binomial)
summary(glm1)


【A】從預測到決策

Fig 13.3 - 從預測到決策

Fig 13.3 - 從預測到決策



【B】預測機率分佈 (DPP)

因為這個資料集很小,我們使用全部的資料來做模擬 (通常我們是使用測試資料集)

pred = predict(glm1, D, type="response")
y = D$PoorCare
data.frame(pred, y) %>% 
  ggplot(aes(x=pred, fill=factor(y))) + 
  geom_histogram(bins=20, col='white', position="stack", alpha=0.5) +
  ggtitle("Distribution of Predicted Probability (DPP,FULL)") +
  xlab("predicted probability")


【C】試算期望報酬

報酬矩陣 Payoff Matrix

payoff = matrix(c(0,-100,-10,-50),2,2) #TN,FN,FP,TP
payoff
     [,1] [,2]
[1,]    0  -10
[2,] -100  -50

期望報酬 Expected Payoff

cutoff = seq(0, 1, 0.01)
result = sapply(cutoff, function(p) sum(
  table(factor(y==1, c(F,T)), factor(pred>p, c(F,T))) # confusion matrix
  * payoff ))
i = which.max(result)
par(cex=0.7, mar=c(4,4,3,1))
plot(cutoff, result, type='l', col='cyan', lwd=2, main=sprintf(
  "Optomal Expected Result: $%d @ %.2f",result[i],cutoff[i]))
abline(v=seq(0,1,0.1),h=seq(-6000,0,100),col='lightgray',lty=3)
points(cutoff[i], result[i], pch=20, col='red', cex=2)


【D】策略模擬

🎯 練習:
執行Sim13.R,先依預設的報酬矩陣回答下列問題:
  【A】 最佳臨界機率是? 它所對應的期望報酬是多少?
     臨界機率:0.16,期望報酬:-2360

i = which.max(result)
cutoff[i]#臨界機率
[1] 0.16
result[i]#期望報酬
[1] -2360

  【B】 什麼都不做時,臨界機率和期望報酬各是多少?
     臨界機率:1,期望報酬:-3300

cutoff[101]#臨界機率
[1] 1
result[101]#期望報酬
[1] -3300

  【C】 每位保戶都做時,臨界機率和期望報酬各是多少?
     臨界機率:0,期望報酬:-2630

cutoff[1]#臨界機率
[1] 0
result[1]#期望報酬
[1] -2630

  【D】 以上哪一種做法的期望報酬比較高?
     每位保戶都做
  【E】 在所有的商務情境都是這種狀況嗎?
     不一定,還是要考慮到其他變數,例如成本。

藉由調整報酬矩陣:
  【F】 模擬出「全不做」比「全做」還要好的狀況
     FP = -100, TP = 0, TN = 0, FN = 0

  【G】 並舉出一個會發生這種狀況的商務情境
     風險高的開刀情況

有五種成本分別為$5, $10, $15, $20, $30的介入方法,它們分別可以將風險成本從$100降低到$70, $60, $50, $40, $25
  【H】 它們的最佳期望報酬分別是多少?

#FP=-5, TP=-75
payoff = matrix(c(0,-100,-5,-75),2,2)
cutoff = seq(0, 1, 0.01)
result = sapply(cutoff, function(p) sum(
  table(factor(y==1, c(F,T)), factor(pred>p, c(F,T))) # confusion matrix
  * payoff ))
i = which.max(result)
result[i]
[1] -2830
#FP=-10, TP=-70
payoff = matrix(c(0,-100,-10,-70),2,2)
cutoff = seq(0, 1, 0.01)
result = sapply(cutoff, function(p) sum(
  table(factor(y==1, c(F,T)), factor(pred>p, c(F,T))) # confusion matrix
  * payoff ))
i = which.max(result)
result[i]
[1] -2830
#FP=-15, TP=-65
payoff = matrix(c(0,-100,-15,-65),2,2)
cutoff = seq(0, 1, 0.01)
result = sapply(cutoff, function(p) sum(
  table(factor(y==1, c(F,T)), factor(pred>p, c(F,T))) # confusion matrix
  * payoff ))
i = which.max(result)
result[i]
[1] -2775
#FP=-20, TP=-60
payoff = matrix(c(0,-100,-20,-60),2,2)
cutoff = seq(0, 1, 0.01)
result = sapply(cutoff, function(p) sum(
  table(factor(y==1, c(F,T)), factor(pred>p, c(F,T))) # confusion matrix
  * payoff ))
i = which.max(result)
result[i]
[1] -2720
#FP=-30, TP=-55
payoff = matrix(c(0,-100,-30,-55),2,2)
cutoff = seq(0, 1, 0.01)
result = sapply(cutoff, function(p) sum(
  table(factor(y==1, c(F,T)), factor(pred>p, c(F,T))) # confusion matrix
  * payoff ))
i = which.max(result)
result[i]
[1] -2700

  【I】 哪一種介入方法的最佳期望報酬是最大的呢?
     最後一個成本30,有最佳的期望報酬$-2700 (FP=-30, TP=-55)




使用manipulate套件做策略模擬

library(manipulate)
manipulate({
  payoff = matrix(c(TN,FN,FP,TP),2,2)
  cutoff = seq(0, 1, 0.01)
  result = sapply(cutoff, function(p) sum(
    table(factor(y==1, c(F,T)), factor(pred>p, c(F,T))) # confusion matrix
    * payoff ))
  i = which.max(result)
  par(cex=0.7)
  plot(cutoff, result, type='l', col='cyan', lwd=2, main=sprintf(
    "Optomal Expected Result: $%d @ %.2f",result[i],cutoff[i]))
  abline(v=seq(0,1,0.1),h=seq(-10000,0,100),col='lightgray',lty=3)
  points(cutoff[i], result[i], pch=20, col='red', cex=2)
  },
  TN = slider(-100,0,   0,step=5),
  FN = slider(-100,0,-100,step=5),
  FP = slider(-100,0, -10,step=5),
  TP = slider(-100,0, -50,step=5)
  )